In [ ]:
# Dr. M. Baron, Statistical Machine Learning class, STAT-427/627
# GEOMETRY of LDA and QDA
# Load the Field Goal data directly from the URL
url = "http://fs2.american.edu/~baron/627/R/Field%20goals.txt"
FG = pd.read_csv(url, delim_whitespace=True, header=None)
# Assign the columns
distance = FG[0] # V1
made = FG[1] # V2
week = FG[2] # V3
In [10]:
FG.head
Out[10]:
<bound method NDFrame.head of 0 1 2 0 30 1 1 1 41 1 1 2 50 1 1 3 22 1 1 4 33 1 1 .. .. .. .. 943 42 1 17 944 46 1 17 945 47 1 17 946 52 0 17 947 51 0 17 [948 rows x 3 columns]>
In [7]:
# Plotting: week vs. distance, colored by whether the goal was made (green for made, red for missed)
plt.scatter(week, distance, c=made + 2, cmap='RdYlGn') # +2 shifts colors to make green/red
plt.xlabel('Week')
plt.ylabel('Distance')
plt.title('Field Goal Attempts (Red: Missed, Green: Made)')
plt.show()
In [8]:
# Apply LDA to predict success of a field goal attempt
lda = LDA()
lda.fit(FG[[0, 2]], made) # Use 'distance' and 'week' as predictors (V1, V3)
# Predict using LDA with cross-validation
y_pred_lda = lda.predict(FG[[0, 2]])
accuracy_lda = np.mean(y_pred_lda == made)
print(f"LDA Classification Rate: {accuracy_lda * 100:.2f}%")
LDA Classification Rate: 80.91%
In [15]:
# Plotting the LDA result
plt.scatter(week, distance, c=pd.factorize(y_pred_lda)[0] + 2, cmap='RdYlGn')
plt.xlabel('Week')
plt.ylabel('Distance')
plt.title('LDA Decision Boundary for Field Goals')
plt.show()
In [18]:
# Apply QDA to predict success of a field goal attempt
qda = QDA()
qda.fit(FG[[0, 2]], made) # Use 'distance' and 'week' as predictors (V1, V3)
# Predict using QDA with cross-validation
y_pred_qda = qda.predict(FG[[0, 2]])
# Plotting the QDA result
plt.scatter(week, distance, c=pd.factorize(y_pred_qda)[0] + 2, cmap='RdYlGn')
plt.xlabel('Week')
plt.ylabel('Distance')
plt.title('QDA Decision Boundary for Field Goals')
plt.show()
# A curvy boundary (QDA) is expected as opposed to the linear boundary (LDA)
In [ ]: